********************************************************************************
************************* Extracting code lists ********************************
******************************* Chapter 4  *************************************



*     Setting up globals so that it is easier to reference in code, replace with your file path
global cons "My file path:\...\CPRD data\Observation\"
global codes "My file path:\...\CPRD data\Codelists\data"
global drugs "My file path:\...\CPRD data\Drug Issue\"









******************************* Example 1  *************************************

***************** Step 1: Format the Drug Issue .txt files *********************
*     Note: for this example, we have 13 Drug Issue files, change this number for the amount of Drug Issue files you have

forvalues i=1/9 {
	import delimited using "$drugs\IS_IA_ ..._Extract_DrugIssue_00`i'.txt, stringcolls(_all) bindquote(nobind)
	save "$drugs\DI00`i'.dta", replace
	clear
}

forvalues i=10/13 {
	import delimited using "$drugs\IS_IA_ ..._Extract_DrugIssue_0`i'.txt, stringcolls(_all) bindquote(nobind)
	save "$drugs\DI00`i'.dta", replace
	clear
}

*     If you have 100+ Drug issue files, use the code below too, we assume the last number of files is n and n>100
forvalues i=100/n {
	import delimited using "$drugs\IS_IA_ ..._Extract_DrugIssue_`i'.txt, stringcolls(_all) bindquote(nobind)
	save "$drugs\DI00`i'.dta", replace
	clear
}




***************** Step 2: Extract the Gabapentinoid codelist *******************
*     The code below is assuming we have 13 drug issue files, change this number to the amount of files you have
*     We want to keep those drug issue events that match the gabapentinoids codelist
forvalues i=1/13 {
	use "$drugs\DI00`i'.dta", replace
	merge m:1 prodcodeid using "$codes\gabapentinoids.dta", keep(3)
	drop _merge
	save "$drugs\gabapentinoid00`i'.dta", replace
}

clear


*     Appending these files to create one usable dataset
use "$drugs\gabapentinoid001.dta", clear
forvalues i=2/13 {
	append using "$drugs\gabapentinoid00`i'.dta"
}




***************** Step 3: Converting string dates to stata readable dates ******
*     Changing the dates from string to date format
gen issuedate1 = date(issuedate, "DMY")
format issuedate1 %td
gen enterdate1 = date(enterdate, "DMY")
format enterdate1 %td


replace issuedate1 = enterdate1 if issuedate1==.




***************** Step 4: Checking for duplicates ******************************
duplicates report patid issueid issuedate1




***************** Step 5: Save file to Working folder **************************
*     Replace file path with your file path to the Working folder created in Introduction chapter
save "My file path:\....\CPRD data\Stata files\Working\gabapentinoid.dta"
















******************************* Example 2  *************************************


***************** Step 1: Format the Observation .txt files
*     Note: for this example, we have 16 Observation files, change this number for the amount of Observation files you have

forvalues i=1/9 {
	import delimited using "$cons\IS_IA_ ..._Extract_Observation_00`i'.txt, stringcolls(_all) bindquote(nobind)
	save "$cons\O00`i'.dta", replace
	clear
}

forvalues i=10/16 {
	import delimited using "$cons\IS_IA_ ..._Extract_Observation_0`i'.txt, stringcolls(_all) bindquote(nobind)
	save "$cons\O00`i'.dta", replace
	clear
}




***************** Step 2: Extract the Osteoporosis codelist ********************

forvalues i=1/16 {
	use "$cons\O00`i'.dta", replace
	merge m:1 medcodeid using "$codes\osteoporosis.dta", keep(3)
	drop _merge
	save "$cons\osteoporosis00`i'.dta", replace
}
clear

*     Appending these files to create one usable dataset
use "$cons\osteoporosis001.dta", clear

forvalues i=2/16 {
	append using "$cons\osteoporosis00`i'.dta"
}




***************** Step 3: Converting dates *************************************
*     Changing the dates from string to date format
gen obsdate1 = date(obsdate, "DMY")
format obsdate1 %td
gen enterdate1 = date(enterdate, "DMY")
format enterdate1 %td


replace obsdate1 = enterdate1 if obsdate1==.




***************** Step 4: Duplicates report ************************************
duplicates report patid obsid obsdate1




***************** Step 5: Save file to Working folder **************************
*     Replace file path with your file path to the Working folder created in Introduction chapter
save "My file path:\....\CPRD data\Stata files\Working\osteoporosis.dta"


